import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from bidi.algorithm import get_display
from arabic_reshaper import reshape
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from dimension_reduction import DimensionReduction
from sklearn.metrics import classification_report
from sklearn.preprocessing import label_binarize
from sklearn.metrics import roc_curve
from sklearn.metrics import auc
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import LogisticRegression
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.optimizers import Adam
from keras.utils import to_categorical
import seaborn as sns
from collections import Counter
from sklearn.model_selection import GridSearchCV
from sklearn import model_selection
from kneed import KneeLocator
from scipy.cluster.hierarchy import dendrogram, linkage
from sklearn.cluster import KMeans
from sklearn.cluster import AgglomerativeClustering
from sklearn.metrics import accuracy_score
from sklearn.metrics.cluster import fowlkes_mallows_score
from sklearn.metrics import log_loss
import warnings
warnings.filterwarnings("ignore")
df = pd.read_csv('data_spectral.csv')
df.head()
| 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | ... | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | D_0 | 0.059704 | 0.087381 | 0.001719 | -158.987503 | -3.755985 | -60.604965 | 6.924028 | -40.192768 | -8.211806 | ... | 21.546323 | 24.586190 | 55.200322 | 26.465070 | 13.116643 | 16.736227 | 16.670350 | 11.982480 | 17.727448 | 19.906522 |
| 1 | D_0 | 0.064066 | 0.085583 | 0.001278 | -158.987503 | -3.755985 | -60.604965 | 6.924028 | -40.192768 | -8.211806 | ... | 22.111645 | 24.544797 | 56.097532 | 23.586952 | 13.098344 | 16.962624 | 15.963867 | 12.388751 | 16.783902 | 23.318567 |
| 2 | D_0 | 0.063933 | 0.079730 | 0.001191 | -158.987503 | -3.755985 | -60.604965 | 6.924028 | -40.192768 | -8.211806 | ... | 22.057996 | 25.068478 | 55.802177 | 24.894988 | 13.754883 | 18.469141 | 18.749436 | 11.648222 | 19.297149 | 20.035435 |
| 3 | D_0 | 0.059029 | 0.085087 | 0.001522 | -158.987503 | -3.755985 | -60.604965 | 6.924028 | -40.192768 | -8.211806 | ... | 22.348724 | 25.147534 | 55.096469 | 30.595766 | 13.352426 | 19.142054 | 20.146716 | 14.319667 | 23.763115 | 22.128027 |
| 4 | D_0 | 0.054941 | 0.083997 | 0.001787 | -158.987503 | -3.755985 | -60.604965 | 6.924028 | -40.192768 | -8.211806 | ... | 21.124431 | 23.921915 | 53.962547 | 25.903111 | 12.602666 | 16.513078 | 16.167857 | 11.463181 | 16.514866 | 11.953743 |
5 rows × 64 columns
X = df.drop(["0"], axis=1)
y = df["0"]
encoder = LabelEncoder()
y = encoder.fit_transform(y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=31)
plt.bar(list(Counter(y_train).keys()), list(Counter(y_train).values()))
plt.xlabel('Dastgah')
plt.ylabel('Count')
plt.title('Dastgah Frequency of Train')
Text(0.5, 1.0, 'Dastgah Frequency of Train')
plt.bar(list(Counter(y_test).keys()), list(Counter(y_test).values()))
plt.xlabel('Dastgah')
plt.ylabel('Count')
plt.title('Dastgah Frequency of Test')
Text(0.5, 1.0, 'Dastgah Frequency of Test')
scaler = StandardScaler()
X_test = scaler.fit_transform(np.array(X_test, dtype = float))
X_train = scaler.transform(np.array(X_train, dtype = float))
dastgah = {
0: "شور",
1: "سهگاه",
2: "ماهور",
3: "همایون",
4: "راست پنجگاه",
5: "نوا",
6: "چهارگاه"
}
def evaluate(model):
print("--------------------------------Train-------------------------------\n")
print(classification_report(y_train, model.predict(X_train)))
print("Error:", log_loss(y_train, model.predict_proba(X_train)))
print("\n")
print("--------------------------------Test--------------------------------\n")
print(classification_report(y_test, model.predict(X_test)))
print("Error:", log_loss(y_test, model.predict_proba(X_test)))
def plot_roc_curve(model, X, y, n_classes, title):
y = label_binarize(y, classes = np.arange(n_classes))
y_pred = label_binarize(model.predict(X), classes = np.arange(n_classes))
for i in range(n_classes):
fpr, tpr, thresholds = roc_curve(y[:, i], y_pred[:, i])
roc_auc = auc(fpr, tpr)
label = 'ROC of class {0} (area={1:0.2f})'.format(i, roc_auc)
plt.plot(fpr, tpr, label = label)
plt.plot([0, 1], [0, 1], linestyle = '--')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve of ' + title)
plt.legend(loc = 'lower right')
plt.show()
def display_model_history(history):
plt.figure(figsize=(10, 5))
plt.plot(history.history['accuracy'], label = 'Train')
plt.plot(history.history['val_accuracy'], label = 'Validation')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label = 'Train')
plt.plot(history.history['val_loss'], label = 'Validation')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
def categories_bar_plot(model, X, y, n_classes, title):
y_pred = model.predict(X)
results = pd.DataFrame()
results['Dastgah'] = y
results['Dastgah']=results['Dastgah'].apply(lambda x: get_display(reshape(dastgah[x])))
results = pd.get_dummies(results['Dastgah'])
results['Predicted'] = y_pred
results['Predicted']=results['Predicted'].apply(lambda x: get_display(reshape(dastgah[x])))
results = results.groupby(['Predicted']).sum()
fig, ax = plt.subplots(1,1)
results.plot.bar(ax = ax,rot=0,figsize=(10, 4))
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))
ax.set_xlabel("Predicted Category")
ax.set_ylabel("Count")
ax.set_title("Categories bar plot for "+title)
plt.show()
def heatmap_plot(model, X, y, title):
y_pred = model.predict(X)
cm = confusion_matrix(y, y_pred)
sortedlabels = [get_display(reshape(label)) for label in dastgah.values()]
cm = pd.DataFrame(cm, index=[sortedlabels], columns=sortedlabels)
plt.figure(figsize = (10,5))
sns.heatmap(cm, linewidths=0.5, annot=True, cmap="Blues", fmt='g')
plt.title("Confusiuon Matrix Heatmap for "+title)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.show()
c = [1e-1, 1e0, 1e1]
gammas = [1e-1, 1e0, 1e1]
model = SVC(kernel = 'rbf', decision_function_shape = 'ovr')
params = dict(C = c, gamma = gammas)
svc = GridSearchCV(model, params, cv = 10)
svc.fit(X_train, y_train)
print('Best parameters for SVM:', svc.best_params_)
Best parameters for SVM: {'C': 10.0, 'gamma': 0.1}
best_svc = SVC(
kernel = 'rbf',
decision_function_shape = 'ovr',
C=svc.best_params_['C'],
gamma=svc.best_params_['gamma'],
probability=True
)
best_svc.fit(X_train, y_train)
evaluate(best_svc)
--------------------------------Train-------------------------------
precision recall f1-score support
0 0.99 1.00 0.99 598
1 1.00 0.99 1.00 562
2 0.99 0.99 0.99 633
3 0.99 0.99 0.99 651
4 0.99 0.99 0.99 513
5 0.99 0.99 0.99 623
6 1.00 0.99 1.00 629
accuracy 0.99 4209
macro avg 0.99 0.99 0.99 4209
weighted avg 0.99 0.99 0.99 4209
Error: 0.08663110788142857
--------------------------------Test--------------------------------
precision recall f1-score support
0 0.74 0.84 0.79 192
1 0.87 0.90 0.88 167
2 0.84 0.83 0.84 221
3 0.80 0.85 0.82 213
4 0.83 0.75 0.79 212
5 0.80 0.75 0.77 203
6 0.85 0.81 0.83 195
accuracy 0.82 1403
macro avg 0.82 0.82 0.82 1403
weighted avg 0.82 0.82 0.82 1403
Error: 0.5902545594060903
plot_roc_curve(svc, X_train, y_train, n_classes=7, title="Train")
plot_roc_curve(svc, X_test, y_test, n_classes = 7, title="Test")
categories_bar_plot(best_svc, X_train, y_train, n_classes=7, title="Train")
categories_bar_plot(best_svc, X_test, y_test, n_classes=7, title="Test")
heatmap_plot(best_svc, X_train, y_train, title="Train")
heatmap_plot(best_svc, X_test, y_test, title="Test")
k_range = np.arange(2, 20)
params = dict(n_neighbors=k_range)
model = KNeighborsClassifier(weights='distance', metric='manhattan')
KNN = GridSearchCV(model, params, cv = 10)
KNN.fit(X_train, y_train)
print('Best K for KNN:', KNN.best_params_)
Best K for KNN: {'n_neighbors': 2}
best_KNN = KNeighborsClassifier(
n_neighbors=KNN.best_params_["n_neighbors"],
weights='distance',
metric='manhattan'
)
best_KNN.fit(X_train, y_train)
evaluate(best_KNN)
--------------------------------Train-------------------------------
precision recall f1-score support
0 1.00 1.00 1.00 598
1 1.00 1.00 1.00 562
2 1.00 1.00 1.00 633
3 1.00 1.00 1.00 651
4 0.99 1.00 0.99 513
5 1.00 0.99 0.99 623
6 1.00 1.00 1.00 629
accuracy 1.00 4209
macro avg 1.00 1.00 1.00 4209
weighted avg 1.00 1.00 1.00 4209
Error: 0.0032936430532678803
--------------------------------Test--------------------------------
precision recall f1-score support
0 0.83 0.85 0.84 192
1 0.88 0.93 0.90 167
2 0.88 0.88 0.88 221
3 0.85 0.86 0.85 213
4 0.89 0.82 0.85 212
5 0.82 0.84 0.83 203
6 0.87 0.85 0.86 195
accuracy 0.86 1403
macro avg 0.86 0.86 0.86 1403
weighted avg 0.86 0.86 0.86 1403
Error: 3.397213325835309
plot_roc_curve(KNN, X_train, y_train, n_classes = 7, title="Train")
plot_roc_curve(KNN, X_test, y_test, n_classes = 7, title="Test")
categories_bar_plot(best_KNN, X_train, y_train, n_classes=7, title="Train")
categories_bar_plot(best_KNN, X_test, y_test, n_classes=7, title="Test")
heatmap_plot(best_KNN, X_train, y_train, title="train")
heatmap_plot(best_KNN, X_test, y_test, title="test")
xgb = XGBClassifier(n_estimators=300)
xgb.fit(X_train, y_train)
XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=300, n_jobs=None, num_parallel_tree=None,
objective='multi:softprob', predictor=None, ...)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. XGBClassifier(base_score=None, booster=None, callbacks=None,
colsample_bylevel=None, colsample_bynode=None,
colsample_bytree=None, early_stopping_rounds=None,
enable_categorical=False, eval_metric=None, feature_types=None,
gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
interaction_constraints=None, learning_rate=None, max_bin=None,
max_cat_threshold=None, max_cat_to_onehot=None,
max_delta_step=None, max_depth=None, max_leaves=None,
min_child_weight=None, missing=nan, monotone_constraints=None,
n_estimators=300, n_jobs=None, num_parallel_tree=None,
objective='multi:softprob', predictor=None, ...)evaluate(xgb)
--------------------------------Train-------------------------------
precision recall f1-score support
0 1.00 1.00 1.00 598
1 1.00 1.00 1.00 562
2 1.00 1.00 1.00 633
3 1.00 1.00 1.00 651
4 0.99 0.99 0.99 513
5 0.99 0.99 0.99 623
6 1.00 1.00 1.00 629
accuracy 1.00 4209
macro avg 1.00 1.00 1.00 4209
weighted avg 1.00 1.00 1.00 4209
Error: 0.008221420472188464
--------------------------------Test--------------------------------
precision recall f1-score support
0 0.73 0.78 0.75 192
1 0.80 0.84 0.82 167
2 0.84 0.77 0.80 221
3 0.74 0.81 0.77 213
4 0.81 0.68 0.74 212
5 0.73 0.78 0.76 203
6 0.75 0.76 0.76 195
accuracy 0.77 1403
macro avg 0.77 0.77 0.77 1403
weighted avg 0.77 0.77 0.77 1403
Error: 0.7622170174145306
plot_roc_curve(xgb, X_train, y_train, n_classes = 7, title="Train")
plot_roc_curve(xgb, X_test, y_test, n_classes = 7, title="Test")
categories_bar_plot(xgb, X_train, y_train, n_classes=7, title="Train")
categories_bar_plot(xgb, X_test, y_test, n_classes=7, title="Test")
best_MLP = MLPClassifier(hidden_layer_sizes = (128, 64, 32, 8), batch_size = 16, solver = 'sgd', random_state=4,
verbose=False, momentum=0.85, max_iter=400, learning_rate_init = 0.006)
best_MLP.fit(X_train, y_train)
MLPClassifier(batch_size=16, hidden_layer_sizes=(128, 64, 32, 8),
learning_rate_init=0.006, max_iter=400, momentum=0.85,
random_state=4, solver='sgd')In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. MLPClassifier(batch_size=16, hidden_layer_sizes=(128, 64, 32, 8),
learning_rate_init=0.006, max_iter=400, momentum=0.85,
random_state=4, solver='sgd')evaluate(best_MLP)
--------------------------------Train-------------------------------
precision recall f1-score support
0 0.99 0.99 0.99 598
1 1.00 1.00 1.00 562
2 1.00 0.99 0.99 633
3 0.99 1.00 0.99 651
4 1.00 0.98 0.99 513
5 0.98 1.00 0.99 623
6 1.00 1.00 1.00 629
accuracy 0.99 4209
macro avg 0.99 0.99 0.99 4209
weighted avg 0.99 0.99 0.99 4209
Error: 0.02525958885441375
--------------------------------Test--------------------------------
precision recall f1-score support
0 0.72 0.81 0.76 192
1 0.81 0.80 0.80 167
2 0.78 0.74 0.76 221
3 0.75 0.81 0.78 213
4 0.77 0.67 0.72 212
5 0.69 0.73 0.71 203
6 0.77 0.74 0.75 195
accuracy 0.75 1403
macro avg 0.76 0.76 0.76 1403
weighted avg 0.76 0.75 0.75 1403
Error: 1.646954450207116
plot_roc_curve(best_MLP, X_train, y_train, n_classes = 7, title="Train")
plot_roc_curve(best_MLP, X_test, y_test, n_classes = 7, title="Test")
categories_bar_plot(best_MLP, X_train, y_train, n_classes=7, title="Train")
categories_bar_plot(best_MLP, X_test, y_test, n_classes=7, title="Test")
heatmap_plot(best_MLP, X_train, y_train, title="Train")
heatmap_plot(best_MLP, X_test, y_test, title="Test")
model = LogisticRegression()
params = [{'penalty':['l1','l2'], 'C':np.logspace(-4, 4, 12)}]
lr = GridSearchCV(model, params, cv = 10)
lr.fit(X_train, y_train)
print("Best parameters for Logistic Regression:(best parameters) ",lr.best_params_)
Best parameters for Logistic Regression:(best parameters) {'C': 2.310129700083158, 'penalty': 'l2'}
best_lr = LogisticRegression( C=lr.best_params_['C'], penalty=lr.best_params_['penalty'])
best_lr.fit(X_train, y_train)
evaluate(best_lr)
--------------------------------Train-------------------------------
precision recall f1-score support
0 0.27 0.27 0.27 598
1 0.27 0.22 0.24 562
2 0.30 0.30 0.30 633
3 0.27 0.35 0.31 651
4 0.25 0.23 0.24 513
5 0.31 0.35 0.33 623
6 0.29 0.24 0.26 629
accuracy 0.28 4209
macro avg 0.28 0.28 0.28 4209
weighted avg 0.28 0.28 0.28 4209
Error: 1.8260369383457316
--------------------------------Test--------------------------------
precision recall f1-score support
0 0.24 0.22 0.23 192
1 0.19 0.19 0.19 167
2 0.28 0.28 0.28 221
3 0.26 0.38 0.31 213
4 0.31 0.24 0.27 212
5 0.32 0.33 0.33 203
6 0.24 0.19 0.21 195
accuracy 0.26 1403
macro avg 0.26 0.26 0.26 1403
weighted avg 0.27 0.26 0.26 1403
Error: 1.8581922051412463
plot_roc_curve(best_lr, X_train, y_train, n_classes = 7, title="Train")
plot_roc_curve(best_lr, X_test, y_test, n_classes = 7, title="Test")
categories_bar_plot(best_lr, X_train, y_train, n_classes=7, title="Train")
categories_bar_plot(best_lr, X_test, y_test, n_classes=7, title="Test")
heatmap_plot(best_lr, X_train, y_train, title="Train")
heatmap_plot(best_lr, X_test, y_test, title="Test")
X_train_new, X_valid, y_train_new, y_valid = train_test_split(X_train, y_train, test_size=0.1, random_state=31)
y_train_one_hot = to_categorical(y_train_new)
y_valid_one_hot = to_categorical(y_valid)
model = Sequential()
model.add(LSTM(units=128, dropout=0.1, recurrent_dropout=0.35, return_sequences=True, input_shape=(X_train_new.shape[1], 1)))
model.add(LSTM(units=64, dropout=0.1, recurrent_dropout=0.35, return_sequences=False))
model.add(Dense(units=y_train_one_hot.shape[1], activation="softmax"))
print("Compiling ...")
model.compile(optimizer = 'adam', loss='categorical_crossentropy', metrics = ['accuracy'])
model.summary()
print("Training ...")
history = model.fit(X_train_new, y_train_one_hot, batch_size=64, epochs=80, validation_data = (X_valid, y_valid_one_hot))
Compiling ...
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
lstm_2 (LSTM) (None, 63, 128) 66560
lstm_3 (LSTM) (None, 64) 49408
dense_1 (Dense) (None, 7) 455
=================================================================
Total params: 116,423
Trainable params: 116,423
Non-trainable params: 0
_________________________________________________________________
Training ...
Epoch 1/80
60/60 [==============================] - 15s 156ms/step - loss: 1.9397 - accuracy: 0.1555 - val_loss: 1.9580 - val_accuracy: 0.1544
Epoch 2/80
60/60 [==============================] - 10s 158ms/step - loss: 1.9303 - accuracy: 0.1956 - val_loss: 1.9430 - val_accuracy: 0.1710
Epoch 3/80
60/60 [==============================] - 10s 161ms/step - loss: 1.9190 - accuracy: 0.2088 - val_loss: 1.9367 - val_accuracy: 0.2090
Epoch 4/80
60/60 [==============================] - 10s 172ms/step - loss: 1.9120 - accuracy: 0.2093 - val_loss: 1.9311 - val_accuracy: 0.1971
Epoch 5/80
60/60 [==============================] - 11s 187ms/step - loss: 1.9029 - accuracy: 0.2054 - val_loss: 1.9174 - val_accuracy: 0.2043
Epoch 6/80
60/60 [==============================] - 11s 177ms/step - loss: 1.8885 - accuracy: 0.2228 - val_loss: 1.9099 - val_accuracy: 0.2162
Epoch 7/80
60/60 [==============================] - 10s 164ms/step - loss: 1.8838 - accuracy: 0.2252 - val_loss: 1.9098 - val_accuracy: 0.2114
Epoch 8/80
60/60 [==============================] - 12s 192ms/step - loss: 1.8687 - accuracy: 0.2413 - val_loss: 1.8871 - val_accuracy: 0.2399
Epoch 9/80
60/60 [==============================] - 7s 123ms/step - loss: 1.8643 - accuracy: 0.2368 - val_loss: 1.8835 - val_accuracy: 0.2447
Epoch 10/80
60/60 [==============================] - 8s 134ms/step - loss: 1.8596 - accuracy: 0.2437 - val_loss: 1.8840 - val_accuracy: 0.2447
Epoch 11/80
60/60 [==============================] - 10s 161ms/step - loss: 1.8512 - accuracy: 0.2447 - val_loss: 1.8672 - val_accuracy: 0.2470
Epoch 12/80
60/60 [==============================] - 8s 127ms/step - loss: 1.8399 - accuracy: 0.2569 - val_loss: 1.8623 - val_accuracy: 0.2613
Epoch 13/80
60/60 [==============================] - 7s 117ms/step - loss: 1.8336 - accuracy: 0.2677 - val_loss: 1.8529 - val_accuracy: 0.2660
Epoch 14/80
60/60 [==============================] - 6s 106ms/step - loss: 1.8260 - accuracy: 0.2751 - val_loss: 1.8469 - val_accuracy: 0.2732
Epoch 15/80
60/60 [==============================] - 6s 107ms/step - loss: 1.8123 - accuracy: 0.2698 - val_loss: 1.8577 - val_accuracy: 0.2447
Epoch 16/80
60/60 [==============================] - 6s 103ms/step - loss: 1.8079 - accuracy: 0.2635 - val_loss: 1.8499 - val_accuracy: 0.2708
Epoch 17/80
60/60 [==============================] - 6s 103ms/step - loss: 1.8006 - accuracy: 0.2907 - val_loss: 1.8306 - val_accuracy: 0.2827
Epoch 18/80
60/60 [==============================] - 6s 103ms/step - loss: 1.7877 - accuracy: 0.2864 - val_loss: 1.8258 - val_accuracy: 0.2684
Epoch 19/80
60/60 [==============================] - 6s 105ms/step - loss: 1.7769 - accuracy: 0.2907 - val_loss: 1.8196 - val_accuracy: 0.2755
Epoch 20/80
60/60 [==============================] - 6s 104ms/step - loss: 1.7603 - accuracy: 0.3002 - val_loss: 1.7966 - val_accuracy: 0.2898
Epoch 21/80
60/60 [==============================] - 6s 107ms/step - loss: 1.7422 - accuracy: 0.3155 - val_loss: 1.7772 - val_accuracy: 0.2874
Epoch 22/80
60/60 [==============================] - 7s 109ms/step - loss: 1.7332 - accuracy: 0.3207 - val_loss: 1.7541 - val_accuracy: 0.3135
Epoch 23/80
60/60 [==============================] - 6s 102ms/step - loss: 1.7183 - accuracy: 0.3271 - val_loss: 1.7553 - val_accuracy: 0.2850
Epoch 24/80
60/60 [==============================] - 6s 102ms/step - loss: 1.7190 - accuracy: 0.3197 - val_loss: 1.7602 - val_accuracy: 0.3183
Epoch 25/80
60/60 [==============================] - 6s 102ms/step - loss: 1.7103 - accuracy: 0.3329 - val_loss: 1.7595 - val_accuracy: 0.3135
Epoch 26/80
60/60 [==============================] - 6s 104ms/step - loss: 1.6849 - accuracy: 0.3390 - val_loss: 1.7331 - val_accuracy: 0.3349
Epoch 27/80
60/60 [==============================] - 6s 103ms/step - loss: 1.6600 - accuracy: 0.3561 - val_loss: 1.7230 - val_accuracy: 0.3230
Epoch 28/80
60/60 [==============================] - 6s 103ms/step - loss: 1.6667 - accuracy: 0.3519 - val_loss: 1.7014 - val_accuracy: 0.3610
Epoch 29/80
60/60 [==============================] - 6s 102ms/step - loss: 1.6469 - accuracy: 0.3551 - val_loss: 1.6658 - val_accuracy: 0.3682
Epoch 30/80
60/60 [==============================] - 6s 103ms/step - loss: 1.6317 - accuracy: 0.3717 - val_loss: 1.6701 - val_accuracy: 0.3682
Epoch 31/80
60/60 [==============================] - 6s 103ms/step - loss: 1.6160 - accuracy: 0.3812 - val_loss: 1.6836 - val_accuracy: 0.3444
Epoch 32/80
60/60 [==============================] - 6s 104ms/step - loss: 1.6031 - accuracy: 0.3928 - val_loss: 1.6641 - val_accuracy: 0.3705
Epoch 33/80
60/60 [==============================] - 6s 102ms/step - loss: 1.5778 - accuracy: 0.3838 - val_loss: 1.6439 - val_accuracy: 0.3705
Epoch 34/80
60/60 [==============================] - 6s 105ms/step - loss: 1.5812 - accuracy: 0.3997 - val_loss: 1.6293 - val_accuracy: 0.3634
Epoch 35/80
60/60 [==============================] - 6s 105ms/step - loss: 1.5575 - accuracy: 0.4124 - val_loss: 1.6441 - val_accuracy: 0.3634
Epoch 36/80
60/60 [==============================] - 6s 103ms/step - loss: 1.5316 - accuracy: 0.4232 - val_loss: 1.6098 - val_accuracy: 0.4086
Epoch 37/80
60/60 [==============================] - 6s 103ms/step - loss: 1.5330 - accuracy: 0.4240 - val_loss: 1.5827 - val_accuracy: 0.4181
Epoch 38/80
60/60 [==============================] - 6s 103ms/step - loss: 1.5223 - accuracy: 0.4248 - val_loss: 1.6219 - val_accuracy: 0.3895
Epoch 39/80
60/60 [==============================] - 6s 102ms/step - loss: 1.5029 - accuracy: 0.4322 - val_loss: 1.5724 - val_accuracy: 0.4276
Epoch 40/80
60/60 [==============================] - 6s 103ms/step - loss: 1.4931 - accuracy: 0.4488 - val_loss: 1.5608 - val_accuracy: 0.3990
Epoch 41/80
60/60 [==============================] - 6s 103ms/step - loss: 1.4783 - accuracy: 0.4501 - val_loss: 1.5918 - val_accuracy: 0.3872
Epoch 42/80
60/60 [==============================] - 6s 102ms/step - loss: 1.4660 - accuracy: 0.4472 - val_loss: 1.5513 - val_accuracy: 0.4181
Epoch 43/80
60/60 [==============================] - 6s 103ms/step - loss: 1.4480 - accuracy: 0.4575 - val_loss: 1.5449 - val_accuracy: 0.4276
Epoch 44/80
60/60 [==============================] - 6s 104ms/step - loss: 1.4390 - accuracy: 0.4636 - val_loss: 1.5481 - val_accuracy: 0.4252
Epoch 45/80
60/60 [==============================] - 6s 105ms/step - loss: 1.4186 - accuracy: 0.4815 - val_loss: 1.5357 - val_accuracy: 0.4418
Epoch 46/80
60/60 [==============================] - 6s 104ms/step - loss: 1.4125 - accuracy: 0.4768 - val_loss: 1.5050 - val_accuracy: 0.4466
Epoch 47/80
60/60 [==============================] - 6s 107ms/step - loss: 1.3947 - accuracy: 0.4836 - val_loss: 1.5167 - val_accuracy: 0.4442
Epoch 48/80
60/60 [==============================] - 6s 104ms/step - loss: 1.3990 - accuracy: 0.4781 - val_loss: 1.5101 - val_accuracy: 0.4323
Epoch 49/80
60/60 [==============================] - 6s 102ms/step - loss: 1.3632 - accuracy: 0.5008 - val_loss: 1.5066 - val_accuracy: 0.4584
Epoch 50/80
60/60 [==============================] - 6s 103ms/step - loss: 1.3793 - accuracy: 0.4865 - val_loss: 1.5062 - val_accuracy: 0.4418
Epoch 51/80
60/60 [==============================] - 6s 103ms/step - loss: 1.3494 - accuracy: 0.5135 - val_loss: 1.5027 - val_accuracy: 0.4347
Epoch 52/80
60/60 [==============================] - 7s 109ms/step - loss: 1.3354 - accuracy: 0.5140 - val_loss: 1.4770 - val_accuracy: 0.4632
Epoch 53/80
60/60 [==============================] - 6s 104ms/step - loss: 1.3242 - accuracy: 0.5182 - val_loss: 1.4852 - val_accuracy: 0.4513
Epoch 54/80
60/60 [==============================] - 6s 104ms/step - loss: 1.3087 - accuracy: 0.5238 - val_loss: 1.4786 - val_accuracy: 0.4394
Epoch 55/80
60/60 [==============================] - 6s 103ms/step - loss: 1.3072 - accuracy: 0.5230 - val_loss: 1.4385 - val_accuracy: 0.4703
Epoch 56/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2965 - accuracy: 0.5267 - val_loss: 1.4661 - val_accuracy: 0.4632
Epoch 57/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2834 - accuracy: 0.5362 - val_loss: 1.4438 - val_accuracy: 0.4703
Epoch 58/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2562 - accuracy: 0.5372 - val_loss: 1.4640 - val_accuracy: 0.4846
Epoch 59/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2536 - accuracy: 0.5544 - val_loss: 1.4594 - val_accuracy: 0.4774
Epoch 60/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2214 - accuracy: 0.5576 - val_loss: 1.4059 - val_accuracy: 0.4988
Epoch 61/80
60/60 [==============================] - 6s 101ms/step - loss: 1.2269 - accuracy: 0.5626 - val_loss: 1.4477 - val_accuracy: 0.4703
Epoch 62/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2292 - accuracy: 0.5494 - val_loss: 1.4326 - val_accuracy: 0.5107
Epoch 63/80
60/60 [==============================] - 6s 103ms/step - loss: 1.2005 - accuracy: 0.5641 - val_loss: 1.4012 - val_accuracy: 0.4941
Epoch 64/80
60/60 [==============================] - 6s 102ms/step - loss: 1.2024 - accuracy: 0.5694 - val_loss: 1.4156 - val_accuracy: 0.4917
Epoch 65/80
60/60 [==============================] - 6s 102ms/step - loss: 1.1996 - accuracy: 0.5694 - val_loss: 1.4263 - val_accuracy: 0.5083
Epoch 66/80
60/60 [==============================] - 7s 109ms/step - loss: 1.1822 - accuracy: 0.5726 - val_loss: 1.4166 - val_accuracy: 0.4941
Epoch 67/80
60/60 [==============================] - 7s 112ms/step - loss: 1.1679 - accuracy: 0.5829 - val_loss: 1.3963 - val_accuracy: 0.5012
Epoch 68/80
60/60 [==============================] - 7s 122ms/step - loss: 1.1540 - accuracy: 0.5884 - val_loss: 1.4146 - val_accuracy: 0.5321
Epoch 69/80
60/60 [==============================] - 7s 118ms/step - loss: 1.1224 - accuracy: 0.6043 - val_loss: 1.3967 - val_accuracy: 0.5321
Epoch 70/80
60/60 [==============================] - 7s 110ms/step - loss: 1.1349 - accuracy: 0.5987 - val_loss: 1.4060 - val_accuracy: 0.5107
Epoch 71/80
60/60 [==============================] - 7s 111ms/step - loss: 1.1110 - accuracy: 0.6088 - val_loss: 1.3867 - val_accuracy: 0.5321
Epoch 72/80
60/60 [==============================] - 8s 126ms/step - loss: 1.1203 - accuracy: 0.6043 - val_loss: 1.4027 - val_accuracy: 0.5297
Epoch 73/80
60/60 [==============================] - 7s 111ms/step - loss: 1.1215 - accuracy: 0.5971 - val_loss: 1.3838 - val_accuracy: 0.5344
Epoch 74/80
60/60 [==============================] - 7s 124ms/step - loss: 1.1204 - accuracy: 0.5979 - val_loss: 1.3889 - val_accuracy: 0.5297
Epoch 75/80
60/60 [==============================] - 7s 115ms/step - loss: 1.0938 - accuracy: 0.6125 - val_loss: 1.3784 - val_accuracy: 0.5249
Epoch 76/80
60/60 [==============================] - 6s 106ms/step - loss: 1.0764 - accuracy: 0.6169 - val_loss: 1.3697 - val_accuracy: 0.5321
Epoch 77/80
60/60 [==============================] - 8s 131ms/step - loss: 1.0525 - accuracy: 0.6257 - val_loss: 1.4134 - val_accuracy: 0.5392
Epoch 78/80
60/60 [==============================] - 7s 108ms/step - loss: 1.0696 - accuracy: 0.6185 - val_loss: 1.3743 - val_accuracy: 0.5273
Epoch 79/80
60/60 [==============================] - 8s 131ms/step - loss: 1.0389 - accuracy: 0.6341 - val_loss: 1.3984 - val_accuracy: 0.5344
Epoch 80/80
60/60 [==============================] - 7s 108ms/step - loss: 1.0412 - accuracy: 0.6333 - val_loss: 1.3622 - val_accuracy: 0.5511
display_model_history(history)
y_pred = np.argmax(model.predict(X_train), axis = 1)
print("--------------------------------Train-------------------------------\n")
print(classification_report(y_train, y_pred))
132/132 [==============================] - 3s 22ms/step
--------------------------------Train-------------------------------
precision recall f1-score support
0 0.78 0.78 0.78 598
1 0.75 0.78 0.77 562
2 0.76 0.78 0.77 633
3 0.73 0.72 0.73 651
4 0.81 0.70 0.75 513
5 0.76 0.78 0.77 623
6 0.77 0.80 0.79 629
accuracy 0.76 4209
macro avg 0.77 0.76 0.76 4209
weighted avg 0.76 0.76 0.76 4209
y_pred = np.argmax(model.predict(X_test), axis = 1)
print("--------------------------------Test--------------------------------\n")
print(classification_report(y_test, y_pred))
44/44 [==============================] - 1s 18ms/step
--------------------------------Test--------------------------------
precision recall f1-score support
0 0.55 0.56 0.55 192
1 0.59 0.67 0.63 167
2 0.65 0.65 0.65 221
3 0.55 0.58 0.56 213
4 0.66 0.44 0.53 212
5 0.51 0.58 0.54 203
6 0.58 0.58 0.58 195
accuracy 0.58 1403
macro avg 0.58 0.58 0.58 1403
weighted avg 0.58 0.58 0.58 1403
dr = DimensionReduction(X_train, y_train, X_test, y_test)
X_train_lda, X_test_lda = dr.LDA(n_components=2)
Determining the ideal number of clusters:
def calculate_erros(data, kmax):
wse = []
for k in range(1, kmax+1):
err = 0
kmeans = KMeans(n_clusters=k, init='k-means++', algorithm='elkan', max_iter=100).fit(data)
pred_clusters = kmeans.predict(data)
for i in range(len(data)):
center = kmeans.cluster_centers_[pred_clusters[i]]
err += (data[i, 0] - center[0]) ** 2 + (data[i, 1] -center[1]) ** 2
wse.append(err)
return wse
list_of_wss = calculate_erros(X_train_lda, 20)
optimal_k = KneeLocator([i for i in range(1, 21)], list_of_wss, curve='convex', direction='decreasing').knee
print("Optimal number of clusters: ", optimal_k)
plt.plot(range(1, 21), list_of_wss, marker='o', linestyle='--')
plt.xlabel('number of clusters')
plt.ylabel('Within Cluster Sum of Squares (WCSS)')
plt.xticks(range(1, 21))
plt.show()
Optimal number of clusters: 6
def plot_dendrogram(train_data,test_data,num_of_features):
fig, axes = plt.subplots(ncols=2, nrows=1,figsize=(25, 10))
axes[0].set_title('Dendogram for Train Data with '+ str(num_of_features) +' features')
clusters = linkage(train_data, method='ward')
dendrogram(clusters,truncate_mode='lastp',show_contracted=True,ax=axes[0],no_labels=True)
axes[1].set_title('Dendogram for Test Data with '+ str(num_of_features) +' features')
clusters = linkage(test_data, method='ward')
dendrogram(clusters,truncate_mode='lastp',show_contracted=True,ax=axes[1],no_labels=True)
plt.show()
def evaluate_clustering(model, number_of_clusters, model_name, d=2):
if d == 2:
fig, ax = plt.subplots(1, 2, figsize=(12, 6))
ax[0].scatter(X_train_lda[:, 0], X_train_lda[:, 1], c=model.fit_predict(X_train))
ax[0].set_title(f"{model_name} Train\nNumber of Clusters: {number_of_clusters}\nFowlkes Mallow Score: {fowlkes_mallows_score(y_train, model.fit_predict(X_train))}")
ax[1].scatter(X_test_lda[:, 0], X_test_lda[:, 1], c=model.fit_predict(X_test))
ax[1].set_title(f"{model_name} Test\nNumber of Clusters: {number_of_clusters}\nFowlkes Mallow Score: {fowlkes_mallows_score(y_test, model.fit_predict(X_test))}")
if d == 3:
fig, ax = plt.subplots(1, 2, figsize=(12, 9), subplot_kw=dict(projection='3d'))
ax[0].scatter(X_train_lda[:, 0], X_train_lda[:, 1], X_train_lda[:, 2], c=model.fit_predict(X_train))
ax[0].set_title(f"{model_name} Train\nNumber of Clusters: {number_of_clusters}\nFowlkes Mallows Score: {fowlkes_mallows_score(y_train, model.fit_predict(X_train))}")
ax[1].scatter(X_test_lda[:, 0], X_test_lda[:, 1], X_test_lda[:, 2], c=model.fit_predict(X_test))
ax[1].set_title(f"{model_name} Test\nNumber of Clusters: {number_of_clusters}\nFowlkes Mallow Score: {fowlkes_mallows_score(y_test, model.fit_predict(X_test))}")
fig.tight_layout()
plt.show()
clusters = [2, 7, 20]
for n_clusters in clusters:
kMeans = KMeans(n_clusters=n_clusters, init='k-means++', algorithm='elkan', max_iter=100)
kMeans.fit(X_train_lda)
evaluate_clustering(kMeans, number_of_clusters=n_clusters, model_name="K-Means")
plt.show()
for n_clusters in clusters:
agglomerative = AgglomerativeClustering(n_clusters=n_clusters)
agglomerative.fit(X_train_lda)
evaluate_clustering(agglomerative, number_of_clusters=n_clusters, model_name="Agglomerative")
plt.show()
plot_dendrogram( X_train_lda,X_test_lda,num_of_features=2)
dr = DimensionReduction(X_train, y_train, X_test, y_test)
X_train_lda, X_test_lda = dr.LDA(n_components=3)
for n_clusters in clusters:
kMeans = KMeans(n_clusters=n_clusters, init='k-means++', algorithm='elkan', max_iter=100)
kMeans.fit(X_train_lda)
evaluate_clustering(kMeans, number_of_clusters=n_clusters, model_name="K-Means", d=3)
plt.show()
for n_clusters in clusters:
agglomerative = AgglomerativeClustering(n_clusters=n_clusters)
agglomerative.fit(X_train_lda)
evaluate_clustering(agglomerative, number_of_clusters=n_clusters, model_name="Agglomerative", d=3)
plt.show()
plot_dendrogram( X_train_lda,X_test_lda,num_of_features=3)